#############################################
#Comparison of volunteer behavior by assignment order
#############################################

rm(list = ls())

#Functions
source("functions.R")

#Packages
installPackageNotFound("xtable")
installPackageNotFound("lfe")

#############################################
#Read in Voter data
#############################################

#All voters
all.voters <- read.csv("voters.csv", stringsAsFactors = F)

#Pairs
all.pairs <- read.csv("voter_pairs.csv", stringsAsFactors = F)

#Drop same household
all.voters <- all.voters[which(all.voters$same_household == 0),]
all.pairs <- all.pairs[which(all.pairs$same_household == 0),]

#Demographic covariates
covariates <- c("Black", "Hispanic", "Asian", "Male", "Under30",
                "VotePropensity", "Partisanship")

#Average demographics across pairs
all.pairs[, paste0(covariates, "_average")] <- sapply(covariates, function(x){
  apply(all.pairs[, c(paste0(x, "_treatment"), paste0(x, "_control"))],
                  1, mean)
})

#############################################
#Statistical tests for methods of contact
#############################################
#Limit sample for behavior tests to treated voters with any update
voter.merge <- all.voters[which(all.voters$Treatment == 1 &
                                   !is.na(all.voters$total_updates)),]

#Form fields
form.fields <-
  c("Postcard_Binary", "Social_Media_Binary", "Email_Binary",
    "Phone_Binary", "In_Person_Binary", "Text_Binary", "Other_Binary")

#Pair covariates
pair_covariates <- c("average_distance_miles", paste0(covariates, "_average"))

#Test vars
test.vars <- c(form.fields, "Method_Count", "days_elapsed", "total_updates",
               "End_Status_In_Progress","End_Status_Success",
               pair_covariates)

#Object for results
result.df <- c()

#Test for differences from pair 1
for(this.group in c(0, 1)){
  for(this.var in test.vars){

    #Choose model data (all pairs or treated VCSF only)
    if(this.var %in% pair_covariates){
      model_data <- all.pairs
      se_string <- ifelse(this.var == "average_distance_miles",
                          "|0|0|volunteer_id", "")
    }else{
      model_data <- voter.merge
      se_string <- "|0|0|volunteer_id"
    }

    #Run lm
    this.test <- summary(felm(formula(paste0(this.var,"~factor(assignment_factor)",
                          se_string)),
        data = model_data[which(model_data$Volunteer_In_District==this.group),]),
                         robust = T)

    #Copy coefficients
    coef.out <- this.test$coefficients

    #Create row
    this.row <- cbind.data.frame(covariate = rep(this.var,3),
                      sample = rep(this.group,3), sample_size = this.test$N,
          coef.out[c("factor(assignment_factor)2","factor(assignment_factor)3",
                     "factor(assignment_factor)4"),],
                   stringsAsFactors = F)
    names(this.row)[5] <- "Cluster s.e."

    #Add to df
    result.df <- rbind.data.frame(result.df,  this.row)
  }

  #Convert to numeric
  numcols <- c("Estimate", "Cluster s.e.", "t value", "Pr(>|t|)")
  result.df[, numcols] <- sapply(result.df[, numcols], as.numeric)

  #Adjust p-values for multiple tests
  result.df[,("Pr(>|t|)")] <- p.adjust(result.df[, "Pr(>|t|)"], method = "fdr")
}

#############################################
#Create table
#############################################

#Add assignment tag
result.df$assignment.order <- rep(1:3, length(test.vars))

#Copy df
formatted.df <- result.df

#Format coef
formatted.df[,"Estimate"] <- sapply(formatted.df[,"Estimate"],
                                    FUN=function(x)return(formatSig(x, 2)))

#Add stars
formatted.df[which(formatted.df[,"Pr(>|t|)"] < 0.05),"Estimate"] <-
  paste0(formatSig(result.df[which(formatted.df[,"Pr(>|t|)"] < 0.05),
                             "Estimate"], 2), "*")
formatted.df[which(formatted.df[,"Pr(>|t|)"] < 0.01),"Estimate"] <-
  paste0(formatSig(result.df[which(formatted.df[,"Pr(>|t|)"] < 0.01),
                             "Estimate"], 2), "**")
formatted.df[which(formatted.df[,"Pr(>|t|)"] < 0.001),"Estimate"] <-
  paste0(formatSig(result.df[which(formatted.df[,"Pr(>|t|)"] < 0.001),
                             "Estimate"], 2), "***")

#Keep only labels and cofficients
formatted.df <- formatted.df[,c("covariate", "sample", "Estimate",
"assignment.order")]

#Reshape
formatted.wide <- reshape(formatted.df,
                          v.names = "Estimate",
                          timevar = "assignment.order",
                          idvar = c("covariate", "sample"),
                          direction ="wide")

#Widen out
result.wide <- cbind(formatted.wide[which(formatted.wide$sample == 1), c(1,3:5)],
                     formatted.wide[which(formatted.wide$sample == 0), c(3:5)])

#Nice labels for table
result.wide$covariate <- c("Postcard", "Social Media",
                  "Email", "Phone", "In Person", "Text", "Other Method",
                  "Method Count",
                  "Days Until First Status Update",
                  "Total Number of Status Updates",
                  "Last Status In Progress", "Last Status Success",
                  "Distance",
                  "Black", "Hispanic", "Asian", "Male", "Under 30",
                  "Vote Propensity", "Partisanship")

#Add names
names(result.wide) <- c("Covariate", "Original.Order2", "Original.Order3",
"Original.Order4+", "Alternate.Order2", "Alternate.Order3",
"Alternate.Order4+")
row.names(result.wide) <- c()

print(result.wide)
